Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import inv
from IPython.display import Image
%matplotlib inline
Image(filename = 'Images/q1.jpg')
#Function to implement one dimensional gradient ascent
def one_d_grad_ascent(lr,x_0,stopping_param_value):
#lr -> Learning rate
#x_0 -> Starting point
#Stopping_param_value -> Refers to the number of places after the decimal point to which the new and old values
# will be compared. For example, if value is 2, two places after the decimal point for x_0
# and x_1 will be compared.
closing_criteria = 0
n_iter = 0
X = list()
Y = list()
f_x = -1*(x_0**2)
while(closing_criteria < 5):
#Compute the derivative and the new value of function
d_f_x = -2*x_0
f_x = -1*(x_0**2)
x_1 = x_0 + lr*d_f_x
n_iter = n_iter + 1
#We check the difference between new and old x values here according to the stopping parameter value.
#If they remain the same for 5 times in a row, then the iterations stop else the bail-out number of iterations
#is set at 11000
if(round(x_1,stopping_param_value) == round (x_0,stopping_param_value)):
closing_criteria = closing_criteria + 1
elif(n_iter == 11000):
closing_criteria = 5
else:
closing_criteria = 0
print("\n")
print("Iteration number:", n_iter)
print("Function value (f_x) is:", f_x)
print("Gradient value (d_f_x) is:", d_f_x)
print("Input value (x_0) is:", x_0)
X.append(x_1)
Y.append(f_x)
x_0 = x_1
print("Total number of iterations are:", n_iter)
plt.scatter(X,Y)
plt.show()
return(X,Y)
X,Y = one_d_grad_ascent(0.05,-20,2)
X,Y = one_d_grad_ascent(0.1,49,5)
From the above implementation, we observe that as we take a more stringent stopping criteria, the number of iterations increases but since the function we have is one dimensional, we are not able to see a difference in the graph.
Also, as we take a smaller learning rate, the updated values are marginally updated and hence it will take more number of iterations to converge and satsify the defined stopping criteria.
Image(filename = 'Images/q4.jpg')
Image(filename = 'Images/q5.jpg')
Image(filename = 'Images/q5_2.jpg')
Image(filename = 'Images/q6.jpg')
x_0 = np.matrix('2;-3')
a = np.matrix('1,2;2,8')
def f(x_0):
#Compute the derivative and the new value of function
f = -(x_0.T @ a @ x_0)
return(f)
def grad(f):
dv = -(a.T + a) @ f
return(dv)
def two_d_grad_ascent(x_0, lr, stopping_param_value):
#lr -> Learning rate
#x_0 -> Starting point
#stopping_param_value -> Refers to the number of places after the decimal point to which the new and old values
# will be compared. For example, if value is 2, two places after the decimal point for x_0
# and x_1 will be compared.
closing_criteria = 0
n_iter = 0
x_coord = float(x_0[0])
y_coord = float(x_0[1])
X = [x_coord]
Y = [y_coord]
while(closing_criteria<60):
#Compute the derivative and the new value of function
func = f(x_0)
der = grad(x_0)
x_1 = x_0 + lr*der
#We check the difference between new and old x values here according to the stopping parameter value.
#If they remain the same for 60 times in a row, then the iterations stop else the bail-out number of iterations
#is set at 11000
if(round(float(x_1[0]), stopping_param_value) == round(float(x_0[0]), stopping_param_value) and round(float(x_1[1]), stopping_param_value) == round(float(x_0[1]), stopping_param_value)):
closing_criteria = closing_criteria + 1
elif(n_iter == 11000):
closing_criteria = 60
else:
closing_criteria = 0
X.append(float(x_1[0]))
Y.append(float(x_1[1]))
x_0 = x_1
n_iter = n_iter + 1
print('Point of Convergence: ',x_1)
print('No. of iterations',n_iter)
return (X,Y)
X,Y = two_d_grad_ascent(x_0, 0.1,2)
X,Y = two_d_grad_ascent(x_0, 0.01,2)
X,Y = two_d_grad_ascent(x_0, 0.1,6)
As we can see from the above results, as the learning rate is decreasing, the number of iterations decrease since convergence is achieved later. Also, a stricter stopping criteria does the same.
## create 30x30 grid matrix
n = 30
ex1 = np.linspace(-10, 10, num=n)
ex2 = np.linspace(-10, 10, num=n)
grid1, grid2 = np.meshgrid(ex1, ex2)
## fill the grid via looping. You may prefer to use ufuncs instead.
z = np.empty_like(grid1)
for i in range(z.shape[0]):
for j in range(z.shape[1]):
x = np.array([grid1[i,j], grid2[i,j]])
z[i,j] = f(x)
## Make the plot
plt.figure(figsize=(11,10))
p = plt.contour(grid1, grid2, z,
levels = [-60, -45,-30, -15, -10, -5, 0],
colors = 'black')
plt.clabel(p, inline=1, fontsize=10)
plt.scatter(X,Y, c='red', s=100)
plt.plot(X,Y, c='red')
plt.show()
n = 30
ex1 = np.linspace(-10, 10, num=n)
ex2 = np.linspace(-10, 10, num=n)
grid1, grid2 = np.meshgrid(ex1, ex2)
## fill the grid via looping. You may prefer to use ufuncs instead.
z = np.empty_like(grid1)
for i in range(z.shape[0]):
for j in range(z.shape[1]):
x = np.array([grid1[i,j], grid2[i,j]])
z[i,j] = f(x)
## Make the plot
plt.figure(figsize=(11,10))
p = plt.contour(grid1, grid2, z,
levels = [-60, -45,-30, -15, -10, -5, 0],
colors = 'black')
plt.clabel(p, inline=1, fontsize=10)
plt.scatter(X,Y, c='red', s=100)
plt.plot(X,Y, c='red')
plt.show()
Here, also we see the same results that stricter stopping criteria and smaller learning rates result in higher number of iterations
x_0 = np.matrix('10;20;30;40;50')
A = np.matrix('11,4,7,10,13;4,17,10,13,16;7,10,23,16,19;10,13,16,29,22;13,16,19,22,35')
def f(x_0):
f = -(x_0.T @ A @ x_0)
return(f)
def grad(f):
dv = -(A.T + A) @ f
return(dv)
def five_d_grad_ascent(x_0, lr, stopping_param_value):
#lr -> Learning rate
#x_0 -> Starting point
#stopping_param_value -> Refers to the number of places after the decimal point to which the new and old values
# will be compared. For example, if value is 2, two places after the decimal point for x_0
# and x_1 will be compared. Also, we out a default break when the function crosses 11000 iterations
# and is not able to converge.
closing_criteria = 0
n_iter = 0
x_coord = float(x_0[0])
y_coord = float(x_0[1])
X = [x_coord]
Y = [y_coord]
while(closing_criteria<20):
func = f(x_0)
der = grad(x_0)
x_1 = x_0 + lr*der
#We check the difference between new and old x values here according to the stopping parameter value.
#If they remain the same for 20 times in a row, then the iterations stop else the bail-out number of iterations
#is set at 11000
if(round(float(x_1[0]),stopping_param_value) == round(float(x_0[0]),stopping_param_value) and round(float(x_1[1]),stopping_param_value) == round(float(x_0[1]),2) and
round(float(x_1[2]),stopping_param_value) == round(float(x_0[2]),stopping_param_value) and round(float(x_1[3]),stopping_param_value) == round(float(x_0[3]),stopping_param_value) and
round(float(x_1[4]),stopping_param_value) == round(float(x_0[4]),stopping_param_value)):
closing_criteria = closing_criteria + 1
elif(n_iter == 11000):
closing_criteria = 20
else:
closing_criteria = 0
X.append(float(x_1[0]))
Y.append(float(x_1[1]))
x_0 = x_1
n_iter = n_iter + 1
#print(x_1)
print("Point of Convergence:",x_0)
print("No. of iterations:", n_iter)
#print(Y)
return (X,Y)
x_0 = np.matrix('10;20;30;40;50')
A = np.matrix('11,4,7,10,13;4,17,10,13,16;7,10,23,16,19;10,13,16,29,22;13,16,19,22,35')
X,Y = five_d_grad_ascent(x_0, 0.01,2)
x_0 = np.matrix('10;20;30;40;50')
A = np.matrix('11,4,7,10,13;4,17,10,13,16;7,10,23,16,19;10,13,16,29,22;13,16,19,22,35')
X,Y = five_d_grad_ascent(x_0, 0.01,6)
x_0 = np.matrix('10;20;30;40;50')
A = np.matrix('11,4,7,10,13;4,17,10,13,16;7,10,23,16,19;10,13,16,29,22;13,16,19,22,35')
X,Y = five_d_grad_ascent(x_0, 0.006,5)
x_0 = np.matrix('10;20;30;40;50')
A = np.matrix('11,4,7,10,13;4,17,10,13,16;7,10,23,16,19;10,13,16,29,22;13,16,19,22,35')
X,Y = five_d_grad_ascent(x_0, 0.1,9)
For a higher learning rate, the 5D gradient ascent is unable to converge before reaching the stopping criteria. We see that as the number of dimensions increasing it becomes more and more difficult to specify learning rates in order for the ascent to converge.
In general, the following observations were made:
1. Learning rate decreases, number of iterations increase
2. The number of iterations depend on the distance of the input vector from the maxima.
a=np.matrix('11 4 7 10 13;4 17 10 13 16;7 10 23 16 19;10 13 16 29 22;13 16 19 22 35')
new=np.linalg.eig(a)
condition_num=(new[0].max())/(new[0].min())
print(condition_num)
print(np.linalg.cond(a))
The numpy method of condition numbers does not take into consideration the root of the fraction obtained.
A=np.matrix('1 1;1 1')
#Compute the determinant.
np.linalg.det(A)
Here, we can see that the determinant of the matrix is 0. This is the condition for a matrix to be singular. Hence we can say that the matrix is singular.
def func(alpha,A):
c = A + alpha*np.eye(2)
eigenvalue = np.linalg.eig(c)
condition_number = np.linalg.cond(c)
return eigenvalue,condition_number
A = np.matrix('1,1;1,1')
eigenvalue,condition_num = func(0.01,A)
print(eigenvalue)
print(condition_num)
eigenvalue,condition_num = func(1,A)
print(eigenvalue)
print(condition_num)
eigenvalue,condition_num = func(100,A)
print(eigenvalue)
print(condition_num)
# accept the quadratic eq
A = np.matrix('1,2;2,8')
# accept x y values
x_0 = np.matrix('9;-10')
A = A + (0.01*np.eye(2))
def f(x_0):
f = -(x_0.T @ A @ x_0)
return(f)
def grad(f):
dv = -(A.T + A) @ f
return(dv)
X,Y = grad_desc_2d(x_0, 0.01, 1)
n = 30
ex1 = np.linspace(-10, 10, num=n)
ex2 = np.linspace(-10, 10, num=n)
grid1, grid2 = np.meshgrid(ex1, ex2)
## fill the grid via looping. You may prefer to use ufuncs instead.
z = np.empty_like(grid1)
for i in range(z.shape[0]):
for j in range(z.shape[1]):
x = np.array([grid1[i,j], grid2[i,j]])
z[i,j] = f(x)
## Make the plot
plt.figure(figsize=(9,9))
p = plt.contour(grid1, grid2, z,
levels = np.arange(start = -50, stop = 50, step = 5),
colors = 'black')
plt.clabel(p, inline=1, fontsize=10)
plt.scatter(X,Y, c='red', s=100)
plt.plot(X,Y, c='red')
plt.show()
# accept the quadratic eq
A = np.matrix('1,2;2,8')
# accept x y values
x_0 = np.matrix('9;-10')
A = A + (1*np.eye(2))
def f(x_0):
f = -(x_0.T @ A @ x_0)
return(f)
def grad(f):
dv = -(A.T + A) @ f
return(dv)
X,Y = grad_desc_2d(x_0, 0.01, 1)
n = 30
ex1 = np.linspace(-10, 10, num=n)
ex2 = np.linspace(-10, 10, num=n)
grid1, grid2 = np.meshgrid(ex1, ex2)
## fill the grid via looping. You may prefer to use ufuncs instead.
z = np.empty_like(grid1)
for i in range(z.shape[0]):
for j in range(z.shape[1]):
x = np.array([grid1[i,j], grid2[i,j]])
z[i,j] = f(x)
## Make the plot
plt.figure(figsize=(9,9))
p = plt.contour(grid1, grid2, z,
levels = np.arange(start = -50, stop = 50, step = 5),
colors = 'black')
plt.clabel(p, inline=1, fontsize=10)
plt.scatter(X,Y, c='red', s=100)
plt.plot(X,Y, c='red')
plt.show()
# accept the quadratic eq
A = np.matrix('1,2;2,8')
# accept x y values
x_0 = np.matrix('9;-10')
A = A + (100*np.eye(2))
def f(x_0):
f = -(x_0.T @ A @ x_0)
return(f)
def grad(f):
dv = -(A.T + A) @ f
return(dv)
X,Y = grad_desc_2d(x_0, 0.001, 2)
n = 30
ex1 = np.linspace(-10, 10, num=n)
ex2 = np.linspace(-10, 10, num=n)
grid1, grid2 = np.meshgrid(ex1, ex2)
## fill the grid via looping. You may prefer to use ufuncs instead.
z = np.empty_like(grid1)
for i in range(z.shape[0]):
for j in range(z.shape[1]):
x = np.array([grid1[i,j], grid2[i,j]])
z[i,j] = f(x)
## Make the plot
plt.figure(figsize=(9,9))
p = plt.contour(grid1, grid2, z,
levels = np.arange(start = -4000, stop = 4000, step = 200),
colors = 'black')
plt.clabel(p, inline=1, fontsize=10)
plt.scatter(X,Y, c='red', s=100)
plt.plot(X,Y, c='red')
plt.show()
Hence we have the following observations:
1. As the alpha value increases
a. The condition number decreases
b. The nunber of iterations decrease
def normalrandomsamples(R, N):
# R =1000. We make R samples of size 10. and make a list which holds the mean of each sample.
mean_of_samples = list()
for i in range(0,R):
sample = np.random.normal(loc = 0.0, scale = 2, size = N)
mean_of_samples.append(np.mean(sample))
mean_of_means = np.mean(mean_of_samples)
std_dev = np.std(mean_of_samples)
confint = np.percentile(mean_of_samples, 10)
print('For sample size',N,'&',R,'repetitions:-')
print('The mean is:',mean_of_means)
print('The standard deviation is:',std_dev)
print('The percentile value is:',confint)
normalrandomsamples(1000,10)
normalrandomsamples(1000,1000)
normalrandomsamples(1000,100000)
Here, we observe that larger sample sizes result in more accurate intervals as the mean, stdev keep on decreasing.
We first plot the Pareto distribution
data = np.random.pareto(a = 1, size=1000)
plt.hist(data, bins=np.logspace(np.log10(1),np.log10(10), 50))
plt.gca().set_xscale("log")
plt.show()
data = np.random.pareto(a = 1, size=10000)
plt.loglog(data)
plt.show()
def paretorandomsamples(a,R,N):
# R =1000. We make R samples of size 10. and make a list which holds the mean of each sample.
mean_of_samples = list()
for i in range(0,R):
sample = np.random.pareto(a=a, size = N)+1
mean_of_samples.append(np.mean(sample))
mean_of_means = np.mean(mean_of_samples)
std_dev = np.std(mean_of_samples)
confint = np.percentile(mean_of_samples, 10)
print('For sample size',N,'&',R,'repetitions:-')
print('The mean is:',mean_of_means)
print('The standard deviation is:',std_dev)
print('The percentile value is:',confint)
paretorandomsamples(1,1000,10)
paretorandomsamples(1,1000,1000)
paretorandomsamples(1,1000,100000)
Here, we see that the mean, standard deviation and percentile value increase with number of samples.